From 42840634639f9503f49b1b1a6256acd6938a8644 Mon Sep 17 00:00:00 2001 From: "iap10@labyrinth.cl.cam.ac.uk" Date: Mon, 9 Aug 2004 09:38:18 +0000 Subject: [PATCH] bitkeeper revision 1.1108.57.1 (4117460ajav0OLcX9y8IQtXQMz0RvQ) step 1 of fixing migration support after the interface changes --- .../arch/xen/kernel/setup.c | 89 +++----- .../drivers/xen/netfront/netfront.c | 4 + .../drivers/xen/privcmd/privcmd.c | 12 ++ .../include/asm-xen/proc_cmd.h | 4 +- tools/libxc/xc_linux_save.c | 195 +++++++++++------- tools/libxc/xc_private.c | 15 ++ tools/libxc/xc_private.h | 3 + xen/arch/x86/domain.c | 2 + xen/include/hypervisor-ifs/arch-x86_32.h | 6 + xen/include/hypervisor-ifs/hypervisor-if.h | 2 + 10 files changed, 193 insertions(+), 139 deletions(-) diff --git a/linux-2.4.26-xen-sparse/arch/xen/kernel/setup.c b/linux-2.4.26-xen-sparse/arch/xen/kernel/setup.c index 039fdaf162..c26e9bb2ef 100644 --- a/linux-2.4.26-xen-sparse/arch/xen/kernel/setup.c +++ b/linux-2.4.26-xen-sparse/arch/xen/kernel/setup.c @@ -60,7 +60,7 @@ static int errno; */ shared_info_t *HYPERVISOR_shared_info = (shared_info_t *)empty_zero_page; -unsigned long *phys_to_machine_mapping; +unsigned long *phys_to_machine_mapping, *pfn_to_mfn_frame_list; multicall_entry_t multicall_list[8]; int nr_multicall_ents = 0; @@ -194,6 +194,7 @@ int xen_module_init(struct module *mod) void __init setup_arch(char **cmdline_p) { + int i,j; unsigned long bootmap_size, start_pfn, max_low_pfn; extern void hypervisor_callback(void); @@ -343,6 +344,22 @@ void __init setup_arch(char **cmdline_p) paging_init(); + pfn_to_mfn_frame_list = alloc_bootmem_low_pages(PAGE_SIZE); + for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ ) + { + pfn_to_mfn_frame_list[j] = + virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT; + } +//pfn_to_mfn_frame_list[0] = 0xdeadbeff; +printk("Hsi %lx %lx :: %lx\n", pfn_to_mfn_frame_list, + virt_to_machine(pfn_to_mfn_frame_list), +HYPERVISOR_shared_info->arch.mfn_to_pfn_start + ); + HYPERVISOR_shared_info->arch.pfn_to_mfn_frame_list = + virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT; + + + /* If we are a privileged guest OS then we should request IO privileges. */ if ( start_info.flags & SIF_PRIVILEGED ) { @@ -1167,50 +1184,16 @@ static void __do_suspend(void) extern void time_suspend(void); extern void time_resume(void); - unsigned long *pfn_to_mfn_frame_list = NULL; suspend_record_t *suspend_record = NULL; - struct net_device *dev; - char name[6]; - int i, j; - if ( (pfn_to_mfn_frame_list = (unsigned long *)__get_free_page(GFP_KERNEL)) - == NULL ) - goto out; if ( (suspend_record = (suspend_record_t *)__get_free_page(GFP_KERNEL)) == NULL ) goto out; - suspend_record->pfn_to_mfn_frame_list = - virt_to_machine(pfn_to_mfn_frame_list) >> PAGE_SHIFT; - suspend_record->nr_pfns = max_pfn; - - for ( i=0, j=0; i < max_pfn; i+=(PAGE_SIZE/sizeof(unsigned long)), j++ ) - { - pfn_to_mfn_frame_list[j] = - virt_to_machine(&phys_to_machine_mapping[i]) >> PAGE_SHIFT; - } - /* - * NB. This is /not/ a full dev_close() as that loses route information! - * Instead we do essentialy the same as dev_close() but without notifying - * various registered subsystems about the NETDEV_DOWN event. - */ - rtnl_lock(); - for ( i = 0; i < 10; i++ ) - { - sprintf(name, "eth%d", i); - if ( ((dev = __dev_get_by_name(name)) != NULL) && - (dev->flags & IFF_UP) ) - { - dev_deactivate(dev); - clear_bit(__LINK_STATE_START, &dev->state); - if ( dev->stop != NULL ) - dev->stop(dev); - dev->flags &= ~IFF_UP; - } - } - rtnl_unlock(); + suspend_record->nr_pfns = max_pfn; /* final number of pfns */ - blkdev_suspend(); + //netdev_suspend(); + //blkdev_suspend(); __cli(); @@ -1245,36 +1228,10 @@ static void __do_suspend(void) __sti(); - blkdev_resume(); - - /* - * We now do the opposite of the network suspend code. Basically it's - * dev_open() but without notifying anyone about NETDEV_UP. - */ - rtnl_lock(); - for ( i = 0; i < 10; i++ ) - { - sprintf(name, "eth%d", i); - if ( ((dev = __dev_get_by_name(name)) != NULL) && - !(dev->flags & IFF_UP) ) - { - set_bit(__LINK_STATE_START, &dev->state); - if ( (dev->open == NULL) || (dev->open(dev) == 0) ) - { - dev->flags |= IFF_UP; - dev_activate(dev); - } - else - { - clear_bit(__LINK_STATE_START, &dev->state); - } - } - } - rtnl_unlock(); + //blkdev_resume(); + //netdev_resume(); out: - if ( pfn_to_mfn_frame_list != NULL ) - free_page((unsigned long)pfn_to_mfn_frame_list); if ( suspend_record != NULL ) free_page((unsigned long)suspend_record); } diff --git a/linux-2.6.7-xen-sparse/drivers/xen/netfront/netfront.c b/linux-2.6.7-xen-sparse/drivers/xen/netfront/netfront.c index 1c55f2cb44..c85e30610e 100644 --- a/linux-2.6.7-xen-sparse/drivers/xen/netfront/netfront.c +++ b/linux-2.6.7-xen-sparse/drivers/xen/netfront/netfront.c @@ -256,6 +256,10 @@ static void network_alloc_rx_buffers(struct net_device *dev) rx_pfn_array[nr_pfns] = virt_to_machine(skb->head) >> PAGE_SHIFT; + /* remove this page from pseudo phys map (migration optimization) */ + phys_to_machine_mapping[virt_to_phys(skb->head) >> PAGE_SHIFT] + = 0x80000001; + rx_mcl[nr_pfns].op = __HYPERVISOR_update_va_mapping; rx_mcl[nr_pfns].args[0] = (unsigned long)skb->head >> PAGE_SHIFT; rx_mcl[nr_pfns].args[1] = 0; diff --git a/linux-2.6.7-xen-sparse/drivers/xen/privcmd/privcmd.c b/linux-2.6.7-xen-sparse/drivers/xen/privcmd/privcmd.c index 8abc2331fe..7f1f8c94e0 100644 --- a/linux-2.6.7-xen-sparse/drivers/xen/privcmd/privcmd.c +++ b/linux-2.6.7-xen-sparse/drivers/xen/privcmd/privcmd.c @@ -181,6 +181,18 @@ static int privcmd_ioctl(struct inode *inode, struct file *file, break; #endif + case IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN: + { + unsigned long m2p_start_mfn = + HYPERVISOR_shared_info->arch.mfn_to_pfn_start; + + if( put_user( m2p_start_mfn, (unsigned long *) data ) ) + ret = -EFAULT; + else + ret = 0; + } + break; + default: ret = -EINVAL; break; diff --git a/linux-2.6.7-xen-sparse/include/asm-xen/proc_cmd.h b/linux-2.6.7-xen-sparse/include/asm-xen/proc_cmd.h index 08e452de15..4292427c68 100644 --- a/linux-2.6.7-xen-sparse/include/asm-xen/proc_cmd.h +++ b/linux-2.6.7-xen-sparse/include/asm-xen/proc_cmd.h @@ -58,6 +58,8 @@ typedef struct privcmd_blkmsg #define IOCTL_PRIVCMD_MMAP \ _IOC(_IOC_NONE, 'P', 2, sizeof(privcmd_mmap_t)) #define IOCTL_PRIVCMD_MMAPBATCH \ - _IOC(_IOC_NONE, 'P', 2, sizeof(privcmd_mmapbatch_t)) + _IOC(_IOC_NONE, 'P', 3, sizeof(privcmd_mmapbatch_t)) +#define IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN \ + _IOC(_IOC_READ, 'P', 4, sizeof(unsigned long)) #endif /* __PROC_CMD_H__ */ diff --git a/tools/libxc/xc_linux_save.c b/tools/libxc/xc_linux_save.c index c74e209bdc..12375e837a 100644 --- a/tools/libxc/xc_linux_save.c +++ b/tools/libxc/xc_linux_save.c @@ -12,7 +12,7 @@ #define BATCH_SIZE 1024 /* 1024 pages (4MB) at a time */ -#define DEBUG 0 +#define DEBUG 1 #define DDEBUG 0 #if DEBUG @@ -32,6 +32,8 @@ * in the guest's pseudophysical map. * 0x80000000-3 mark the shared_info, and blk/net rings */ + +#if 0 #define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \ (((_mfn) < (1024*1024)) && \ (((live_mfn_to_pfn_table[_mfn] < nr_pfns) && \ @@ -39,7 +41,13 @@ ((live_mfn_to_pfn_table[_mfn] >= 0x80000000) && \ (live_mfn_to_pfn_table[_mfn] <= 0x80000003)) || \ (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == 0x80000004))) - +#endif +#define MFN_IS_IN_PSEUDOPHYS_MAP(_mfn) \ + (((_mfn) < (1024*1024)) && \ + ((live_mfn_to_pfn_table[_mfn] < nr_pfns) && \ + (live_pfn_to_mfn_table[live_mfn_to_pfn_table[_mfn]] == (_mfn)))) + + /* Returns TRUE if MFN is successfully converted to a PFN. */ #define translate_mfn_to_pfn(_pmfn) \ ({ \ @@ -238,7 +246,7 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt) int rc = 1, i, j, k, last_iter, iter = 0; unsigned long mfn; u32 domid = ioctxt->domain; - int live = (ioctxt->flags & XCFLAGS_LIVE); + int live = 0; // (ioctxt->flags & XCFLAGS_LIVE); int debug = (ioctxt->flags & XCFLAGS_DEBUG); int sent_last_iter, skip_this_iter; @@ -270,9 +278,10 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt) unsigned long *live_pfn_to_mfn_table = NULL; /* Live mapping of system MFN to PFN table. */ unsigned long *live_mfn_to_pfn_table = NULL; + unsigned long mfn_to_pfn_table_start_mfn; /* Live mapping of shared info structure */ - unsigned long *live_shinfo; + shared_info_t *live_shinfo; /* base of the region in which domain memory is mapped */ unsigned char *region_base = NULL; @@ -302,12 +311,6 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt) return 1; } - /* Ensure that the domain exists, and that it is stopped. */ - if ( xc_domain_pause(xc_handle, domid) ){ - xcio_perror(ioctxt, "Could not pause domain"); - goto out; - } - if ( xc_domain_getfullinfo( xc_handle, domid, &op, &ctxt) ) { xcio_error(ioctxt, "Could not get full domain info"); @@ -321,22 +324,23 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt) xcio_error(ioctxt, "Domain is not in a valid Linux guest OS state"); goto out; } + + nr_pfns = op.u.getdomaininfo.max_pages; - /* Map the suspend-record MFN to pin it. The page must be owned by - domid for this to succeed. */ - p_srec = mfn_mapper_map_single(xc_handle, domid, - sizeof(*p_srec), PROT_READ, - ctxt.cpu_ctxt.esi); - if (!p_srec){ - xcio_error(ioctxt, "Couldn't map state record"); + /* cheesy sanity check */ + if ( nr_pfns > 1024*1024 ){ + xcio_error(ioctxt, "Invalid state record -- pfn count out of range: %lu", nr_pfns); goto out; } - nr_pfns = p_srec->nr_pfns; - /* cheesy sanity check */ - if ( nr_pfns > 1024*1024 ){ - xcio_error(ioctxt, "Invalid state record -- pfn count out of range: %lu", nr_pfns); + /* Map the shared info frame */ + live_shinfo = mfn_mapper_map_single(xc_handle, domid, + PAGE_SIZE, PROT_READ, + shared_info_frame); + + if (!live_shinfo){ + xcio_error(ioctxt, "Couldn't map live_shinfo"); goto out; } @@ -344,30 +348,13 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt) live_pfn_to_mfn_frame_list = mfn_mapper_map_single(xc_handle, domid, PAGE_SIZE, PROT_READ, - p_srec->pfn_to_mfn_frame_list ); + live_shinfo->arch.pfn_to_mfn_frame_list ); if (!live_pfn_to_mfn_frame_list){ xcio_error(ioctxt, "Couldn't map pfn_to_mfn_frame_list"); goto out; } - /* Track the mfn_to_pfn table down from the domains PT */ - { - unsigned long *pgd; - unsigned long mfn_to_pfn_table_start_mfn; - - pgd = mfn_mapper_map_single(xc_handle, domid, - PAGE_SIZE, PROT_READ, - ctxt.pt_base>>PAGE_SHIFT); - - mfn_to_pfn_table_start_mfn = - pgd[HYPERVISOR_VIRT_START>>L2_PAGETABLE_SHIFT]>>PAGE_SHIFT; - - live_mfn_to_pfn_table = - mfn_mapper_map_single(xc_handle, ~0UL, - PAGE_SIZE*1024, PROT_READ, - mfn_to_pfn_table_start_mfn ); - } /* Map all the frames of the pfn->mfn table. For migrate to succeed, the guest must not change which frames are used for this purpose. @@ -383,9 +370,17 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt) goto out; } + /* Setup the mfn_to_pfn table mapping */ + mfn_to_pfn_table_start_mfn = xc_get_m2p_start_mfn( xc_handle ); + + live_mfn_to_pfn_table = + mfn_mapper_map_single(xc_handle, 0x7FFFFFFF, + PAGE_SIZE*1024, PROT_READ, + mfn_to_pfn_table_start_mfn ); /* Canonicalise the pfn-to-mfn table frame-number list. */ memcpy( pfn_to_mfn_frame_list, live_pfn_to_mfn_frame_list, PAGE_SIZE ); + for ( i = 0; i < nr_pfns; i += 1024 ){ if ( !translate_mfn_to_pfn(&pfn_to_mfn_frame_list[i/1024]) ){ xcio_error(ioctxt, "Frame # in pfn-to-mfn frame list is not in pseudophys"); @@ -393,8 +388,8 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt) } } - /* At this point, we can start the domain again if we're doing a - live suspend */ + + /* Domain is still running at this point */ if( live ){ if ( xc_shadow_control( xc_handle, domid, @@ -404,15 +399,31 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt) goto out; } - if ( xc_domain_unpause(xc_handle, domid) < 0 ){ - xcio_error(ioctxt, "Couldn't unpause domain"); - goto out; - } - last_iter = 0; sent_last_iter = 1<<20; /* 4GB of pages */ } else{ + /* This is a non-live suspend. Issue the call back to get the + domain suspended */ + last_iter = 1; + + xcio_suspend_domain(ioctxt); + + if ( xc_domain_getfullinfo( xc_handle, domid, &op, &ctxt) ) + { + xcio_error(ioctxt, "Could not get full domain info"); + goto out; + } + + if ( (op.u.getdomaininfo.flags & + ( DOMFLAGS_SHUTDOWN | (SHUTDOWN_suspend<nr_pfns ); + + if (nr_pfns != p_srec->nr_pfns ) { - xcio_perror(ioctxt, "Could not get full domain info"); + xcio_error(ioctxt, "Suspend record nr_pfns unexpected (%ld != %ld)", + p_srec->nr_pfns, nr_pfns); goto out; } /* Canonicalise the suspend-record frame number. */ if ( !translate_mfn_to_pfn(&ctxt.cpu_ctxt.esi) ){ - xcio_error(ioctxt, "State record is not in range of pseudophys map"); + xcio_error(ioctxt, "Suspend record is not in range of pseudophys map"); goto out; } - + /* Canonicalise each GDT frame number. */ for ( i = 0; i < ctxt.gdt_ents; i += 512 ) { if ( !translate_mfn_to_pfn(&ctxt.gdt_frames[i]) ) { @@ -831,7 +882,7 @@ int xc_linux_save(int xc_handle, XcIOContext *ioctxt) goto out; } munmap(live_shinfo, PAGE_SIZE); - +printf("Everything saved OK!\n"); out: if ( pfn_type != NULL ) free(pfn_type); DPRINTF("Save exit rc=%d\n",rc); diff --git a/tools/libxc/xc_private.c b/tools/libxc/xc_private.c index 8807f8a9bf..4388836078 100644 --- a/tools/libxc/xc_private.c +++ b/tools/libxc/xc_private.c @@ -317,3 +317,18 @@ unsigned long csum_page (void * page) return sum ^ (sum>>32); } + +unsigned long xc_get_m2p_start_mfn ( int xc_handle ) +{ + unsigned long mfn; + + if ( ioctl( xc_handle, IOCTL_PRIVCMD_GET_MACH2PHYS_START_MFN, &mfn ) < 0 ) + { + perror("xc_get_m2p_start_mfn:"); + return 0; + } + return mfn; +} + + + diff --git a/tools/libxc/xc_private.h b/tools/libxc/xc_private.h index 5a1cc1d122..3d0572501b 100644 --- a/tools/libxc/xc_private.h +++ b/tools/libxc/xc_private.h @@ -209,4 +209,7 @@ int xc_domain_getfullinfo(int xc_handle, u32 domid, dom0_op_t *op, full_execution_context_t *ctxt ); + +unsigned long xc_get_m2p_start_mfn ( int xc_handle ); + #endif /* __XC_PRIVATE_H__ */ diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c index 6f6b7b620f..7ca89d8b64 100644 --- a/xen/arch/x86/domain.c +++ b/xen/arch/x86/domain.c @@ -219,6 +219,8 @@ void arch_do_createdomain(struct domain *d) { d->shared_info = (void *)alloc_xenheap_page(); memset(d->shared_info, 0, PAGE_SIZE); + d->shared_info->arch.mfn_to_pfn_start = + virt_to_phys(&machine_to_phys_mapping[0])>>PAGE_SHIFT; SHARE_PFN_WITH_DOMAIN(virt_to_page(d->shared_info), d); machine_to_phys_mapping[virt_to_phys(d->shared_info) >> PAGE_SHIFT] = 0x80000000UL; /* debug */ diff --git a/xen/include/hypervisor-ifs/arch-x86_32.h b/xen/include/hypervisor-ifs/arch-x86_32.h index 80055a5062..04edd474ea 100644 --- a/xen/include/hypervisor-ifs/arch-x86_32.h +++ b/xen/include/hypervisor-ifs/arch-x86_32.h @@ -131,6 +131,12 @@ typedef struct { unsigned long failsafe_callback_eip; } PACKED full_execution_context_t; +typedef struct { + u64 mfn_to_pfn_start; /* MFN of start of m2p table */ + u64 pfn_to_mfn_frame_list; /* MFN of a table of MFNs that + make up p2m table */ +} PACKED arch_shared_info_t; + #define ARCH_HAS_FAST_TRAP #endif diff --git a/xen/include/hypervisor-ifs/hypervisor-if.h b/xen/include/hypervisor-ifs/hypervisor-if.h index b6231e5ec5..1811b11297 100644 --- a/xen/include/hypervisor-ifs/hypervisor-if.h +++ b/xen/include/hypervisor-ifs/hypervisor-if.h @@ -318,6 +318,8 @@ typedef struct shared_info_st execution_context_t execution_context; /* 328 */ + arch_shared_info_t arch; + } PACKED shared_info_t; /* -- 2.30.2